######################################################################
# Title:  Do Political Finance Reforms Really Reduce Corruption? A Replication Study

# Description: R code to replicate the results in the article: online appendix
#           
# Authors:  Sergiu Lipcean & Casal Bértoa Fernando
# Date:     05.12.2024 
##################################################################

setwd("set_your_working_directory_that_contains_data")

## Download required packages 

library(tidyverse)
library(fixest)
library(broom)
library(modelsummary)
library(marginaleffects)
library(patchwork)
library(scales)
library(kableExtra)
library(NatParksPalettes)

###  Table A1: Description of PFSI components

pfsi_components <- tibble(
  Variable = c("Campaign subsidies de jure (campaignfin_onpaper)",
               "Campaign subsidies de facto (campaigns_pubfin)",
               "Party subsidies de jure (partyfin_onpaper)",
               "Party subsidies de facto (partyorg_pubfin)",
               "Majority public money (majority_pubfin)"),
  Definition = c("National law legislates public subsidies to political campaigns",
                 "Observers document the government disbursing campaign subsidies",
                 "National law legislates public subsidies to party organizations",
                 "Observers document the government disbursing party subsidies",
                 "The majority of money in political campaigns comes from the government"),
  Sources = c("National laws, experts",
              "Supranational reports, election observer reports, experts",
              "National laws, experts",
              "Supranational reports, election observer reports, experts",
              "Global Integrity data, GRECO reports, government statistics, experts")
)

kbl(pfsi_components, booktabs = TRUE, longtable = T, caption = "Description of PFSI components") %>%
  kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position", "striped")) %>%
  column_spec(column = 1:3, width = "5cm") 


## Table A2: Discrepancies between the coding of public funding for party statutory and election activities in the HGB dataset and the de jure introduction of state subsidies

yugoslavia_table <- tibble(
  Country = c("Slovenia", "Croatia", "Macedonia", "Serbia", "Montenegro", "Albania"),
  `HGB: statutory funding` = c(2005, 2006, 2004, 2003, 2008, 2000),
  `Introduction of statutory funding` = c(1989, 1993, 1994, 1991, 1993, 1991),
  `HGB: electoral funding` = c(2005, 2006, 2006, 2003, 2008, 2000),
  `Introduction of electoral funding` = c(1994, 1992, 1990, 1992, 1990, 1991))

kbl(yugoslavia_table, align = "lcccc", booktabs = TRUE, longtable = T, caption = "Discrepancies between the coding of public funding for party statutory and election activities in the HGB dataset and the de jure introduction of state subsidies") %>%
  kable_styling(font_size = 11, full_width = F, latex_options = c( "hold_position", "striped")) %>%
  column_spec(column = 1:5, width = "3cm") 

### Import the main dataset from the working directory 

pf_data <- readRDS("pfsi_data.rds") %>%
  #rename("Country" = country_name, "Year" = year) %>%
  group_by(country_id) %>%
  mutate(across(.cols = c(v2elpubfinV8, v2x_corrV8, v2x_execorrV8, v2lgcrrptflip, v2x_pubcorrV8, v2jucorrdcflip, v2mecorrptflip), ~dplyr::lead(.x, 5), .names = "{.col}_lead")) %>% # construct 5 year forward lags as in Hummel et al.
  ungroup()


### Create a custom theme 
custom_theme <- 
  theme_light(base_family = "serif") +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_line(linetype = 3),
        legend.position = "none",
        plot.caption.position = "plot",
        plot.caption = element_text(size = 11, hjust = 0),
        axis.text = element_text(size = 11, hjust = 0.5),
        axis.title = element_text(size = 11, hjust = 0.5))

### Set a custom theme for all figures
theme_set(custom_theme)

## Data management for Figure A1: Relationship between the share of public funding in party income and alternative operationalisations of PFSI

## Disclaimer: Since data used to build Figure A1 is not in open access, I do not make it available by default. However, I can provide it on request, this relates to the variable dpf_share without which you won't be able to replicate Figure A1

cor_pubfin_add <- cor(pf_data$dpf_share, pf_data$pubfin_add, use = "complete.obs")
cor_pubfin_add_short <- cor(pf_data$dpf_share, pf_data$pubfin_add_short, use = "complete.obs")
cor_pfsi_recoded <- cor(pf_data$dpf_share, pf_data$pfsi_recoded, use = "complete.obs")
cor_pubfinindexlog <- cor(pf_data$dpf_share, pf_data$pubfinindexlog, use = "complete.obs")
cor_pubfinindexstock1log <- cor(pf_data$dpf_share, pf_data$pubfinindexstock1log, use = "complete.obs")

### Arrange correlation scores into a data frame
dpf_share_correlation <- 
  data.frame(pfsi_indices = c("pubfin_add", "pubfin_add_short", "pfsi_recoded", "pubfinindexlog", "pubfinindexstock1log"),
             correlation_scores = round(c(cor_pubfin_add, cor_pubfin_add_short, cor_pfsi_recoded, cor_pubfinindexlog, cor_pubfinindexstock1log), 2))

### Transform data from wide to long format for plotting Figure A1

pf_data_share <- pf_data %>%
  select(author, Year, dpf_share, pubfin_add, pubfin_add_short, pfsi_recoded,  pubfinindexlog, pubfinindexstock1log) %>%
  pivot_longer(cols = pubfin_add:pubfinindexstock1log, names_to = "pfsi_indices", values_to = "pfsi_scores") %>%
  mutate(
    pfsi_labels = case_when(pfsi_indices == "pubfin_add" ~ "PFSI additive",
                            pfsi_indices == "pubfin_add_short" ~ "PFSI additive & no duplicates",
                            pfsi_indices == "pfsi_recoded" ~ "PFSI additive recoded",
                            pfsi_indices == "pubfinindexlog" ~ "PFSI additive logged",
                            pfsi_indices == "pubfinindexstock1log" ~ "PFSI stock logged",
                            .default = NA_character_)) %>%
  drop_na(author) %>%
  left_join(dpf_share_correlation, by = "pfsi_indices") %>%
  mutate(correlation_scores = ifelse(Year == 2009 & dpf_share == 15.0, correlation_scores, NA_real_))
  
## Figure A1: Relationship between the share of public funding in party income and alternative operationalisations of PFSI

pf_data_share %>%
  ggplot(aes(x = dpf_share, y = pfsi_scores)) + 
  geom_point(aes(colour = author, shape = author)) +
  scale_colour_natparks_d("Yellowstone") +
  scale_y_continuous(breaks = seq(0,6,1)) +
  geom_text(aes(label = ifelse(!is.na(correlation_scores), paste0("italic(r)==", correlation_scores), NA)), 
            family = "serif", size = 3.5, x = 10, y = 5.5, parse = TRUE) +
  #geom_text(aes(label = ifelse(!is.na(correlation_scores), paste0("italic(r)^2==", correlation_scores), NA)), family = "serif", size = 3, x = 80, y = 4.5, parse = TRUE) +
  facet_wrap(~pfsi_labels, ncol = 3) +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey10",face = "bold", size = 11),
        axis.title = element_text(colour = "grey10", size = 12),
        axis.text = element_text(colour = "grey10", size = 11),
        legend.text = element_text(colour = "grey10", size = 11),
        legend.title = element_blank(),
        legend.position = c(0.85, 0.2)) +
  labs(x = "Direct public funding share in party income %", y = "PFSI scores")


## Data management for Figure A2: Relationship between the level of direct public funding per vote and alternative operationalisations of PFSI

corpc_pubfin_add <- cor(pf_data$dpfv_total, pf_data$pubfin_add, use = "complete.obs")
corpc_pubfin_add_short <- cor(pf_data$dpfv_total, pf_data$pubfin_add_short, use = "complete.obs")
corpc_pfsi_recoded<- cor(pf_data$dpfv_total, pf_data$pfsi_recoded, use = "complete.obs")
corpc_pubfinindexlog <- cor(pf_data$dpfv_total, pf_data$pubfinindexlog, use = "complete.obs")
corpc_pubfinindexstock1log <- cor(pf_data$dpfv_total, pf_data$pubfinindexstock1log, use = "complete.obs")

### Organise the correlation scores into a data frame

dpf_postcom_correlation <- 
  data.frame(pfsi_indices = c("pubfin_add", "pubfin_add_short", "pfsi_recoded", "pubfinindexlog", "pubfinindexstock1log"),
             correlation_scores = round(c(corpc_pubfin_add, corpc_pubfin_add_short, corpc_pfsi_recoded, corpc_pubfinindexlog, corpc_pubfinindexstock1log), 2))

### Transform data from wide to long format for plotting Figure A2

pf_data_postcom <- pf_data %>%
  select(country_id, Year, dpfv_total, pubfin_add, pubfin_add_short, pfsi_recoded,  pubfinindexlog, pubfinindexstock1log) %>%
  pivot_longer(cols = pubfin_add:pubfinindexstock1log, names_to = "pfsi_indices", values_to = "pfsi_scores") %>%
  mutate(
    pfsi_labels = case_when(pfsi_indices == "pubfin_add" ~ "PFSI additive",
                            pfsi_indices == "pubfin_add_short" ~ "PFSI additive & no duplicates",
                            pfsi_indices == "pfsi_recoded" ~ "PFSI additive recoded",
                            pfsi_indices == "pubfinindexlog" ~ "PFSI additive logged",
                            pfsi_indices == "pubfinindexstock1log" ~ "PFSI stock logged",
                            .default = NA_character_)) %>%
  drop_na(dpfv_total) %>%
  left_join(dpf_postcom_correlation, by = "pfsi_indices") %>%
  mutate(correlation_scores = ifelse(Year == 2009 & country_id == "ALB", correlation_scores, NA_real_))


## Figure A2: Relationship between the level of direct public funding per vote and alternative operationalisations of PFSI

pf_data_postcom %>%
  ggplot(aes(x = dpfv_total, y = pfsi_scores)) + 
  geom_jitter(colour = "steelblue4", shape = 1) +
  #scale_colour_natparks_d("Yellowstone") +
  scale_y_continuous(breaks = seq(0,6,1)) +
  geom_text(aes(label = ifelse(!is.na(correlation_scores), paste0("italic(r)==", correlation_scores), NA)), family = "serif", size = 3.5, x = 14, y = 4.8, parse = TRUE) +
  facet_wrap(~pfsi_labels, ncol = 3) +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey10",face = "bold", size = 11),
        axis.text = element_text(colour = "grey10", size = 11),
        legend.text = element_text(colour = "grey10", size = 11),
        legend.title = element_blank(),
        legend.position = c(0.8, 0.2)) +
  labs(x = "Direct public funding per vote in $", y = "PFSI scores")


## Z-tests for coefficient differences between alternative model specifications for the Global sample displayed in Panel A, Figure 3 in the manuscript

### Set panel data
pf_data_panel <- panel(pf_data, ~Country + Year)

## Fit Two-Way Fixed-Effects models using original model specification in Hummel et al. and their stock-based measure of public funding vs the same model specifications and alternative operationalisations of public funding: period 1900–2015 

m1z <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway")

m2z <- feols(v2x_corrV8 ~ pubfin_add_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway")

m3z <- feols(v2x_corrV8 ~ pubfin_add_short_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway")

m4z <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway")

# Extract coefficients and standard errors
coef_m1 <- tidy(m1z)
coef_m2 <- tidy(m2z)
coef_m3 <- tidy(m3z)
coef_m4 <- tidy(m4z)

# Select the specific coefficients to compare
coef1 <- coef_m1[coef_m1$term == "pubfinindexstock1log_l5", c("estimate", "std.error")]
coef2 <- coef_m2[coef_m2$term == "pubfin_add_l5", c("estimate", "std.error")]
coef3 <- coef_m3[coef_m3$term == "pubfin_add_short_l5", c("estimate", "std.error")]
coef4 <- coef_m4[coef_m4$term == "pfsi_recoded_l5", c("estimate", "std.error")]

# Compute difference, z-score, and p-value 
diff1 <- coef1$estimate - coef2$estimate
se_diff1 <- sqrt(coef1$std.error^2 + coef2$std.error^2)
z1 <- diff1 / se_diff1
p_value1 <- 2 * (1 - pnorm(abs(z1)))

# Compute difference, z-score, and p-value
diff2 <- coef1$estimate - coef3$estimate
se_diff2 <- sqrt(coef1$std.error^2 + coef3$std.error^2)
z2 <- diff2 / se_diff2
p_value2 <- 2 * (1 - pnorm(abs(z2)))

# Compute difference, z-score, and p-value
diff3 <- coef1$estimate - coef4$estimate
se_diff3 <- sqrt(coef1$std.error^2 + coef4$std.error^2)
z3 <- diff3 / se_diff3
p_value3 <- 2 * (1 - pnorm(abs(z3)))

metrics <- data.frame(`Models comparison` = 
                        c("PFSI Stock vs. PFSI Additive",
                          "PFSI Stock vs. PFSI additive: No duplicates",
                          "PFSI Stock vs. PFSI additive: No duplicates & recoded"),
                      `Difference in coefficients` = c(diff1, diff2, diff3),
                      `Z-score` = c(z1, z2, z3),
                      `P-value` = c(p_value1, p_value2, p_value3)) %>%
  rename_with(.fn = ~str_replace_all(.x, "\\.", " "))

## Table A3: Z-tests for coefficient differences between alternative model specifications

kbl(metrics, 
    booktabs = TRUE, 
    longtable = T,
    align = "lrrr",
    caption = "Z-tests for coefficient differences between alternative model specifications") %>%
  kable_styling(font_size = 11, full_width = F, 
                latex_options = c("hold_position", "striped")) %>%
  #row_spec(c(1:4), hline_after = T) %>%
  column_spec(column = 1, width = "6cm") %>%
  column_spec(column = 2, width = "4cm") %>%
  column_spec(column = 3, width = "2cm") %>%
  column_spec(column = 4, width = "2cm")

## Appendix B: Regression summaries for two-way fixed effects models on the relationship between PFSI and corruption using alternative operationalisation of PFSI displayed in Figure 3 in the main text

# Define clean term labels to use in regression tables
model_labels <- c("pubfinindexstock1log_l5" = "PFSI original",
                  "pubfin_add_l5" = "PFSI additive",
                  "pubfin_add_short_l5" = "PFSI additive: No duplicates",
                  "pfsi_recoded_l5" = "PFSI additive: No duplicates & recoded",
                  "v2x_polyarchyV8_l5" = "Poliarchy",
                  "I(v2x_polyarchyV8_l5^2)" = "Poliarchy squared",
                  "Fariss_Maddison_gdppc_1990_ln_l5" = "GDP per capita",
                  "Fariss_e_miurbani_l5" = "Urbanisation",
                  "Fariss_gdp_growth_l5" = "GDP growth",
                  "v2x_elecregV8_l5" = "Regular elections")

## Fit models for Table B1 

twfe_by_region_full <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

### Table B1

modelsummary(twfe_by_region_full, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI stock, sample 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table B2

twfe_by_region_simple <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table B2

modelsummary(twfe_by_region_simple, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI additive, sample 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%  
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


## Fit models for Table B3

twfe_by_region_short <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_short_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))


## Table B3

modelsummary(twfe_by_region_short, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI additive -- No duplicates, sample 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


## Fit models for Table B4

twfe_by_region_recoded <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table B4

modelsummary(twfe_by_region_recoded, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI additive -- No duplicates & recoded, sample 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


### Fit TWFE models with data for the period 1960-2015

pf_data_cut <- pf_data %>% filter(Year > 1960)

## Transform to panel data
pf_data_panel_cut <- panel(pf_data_cut, ~Country + Year)

## Fit models for Table B5

twfe_by_region_stock_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

## Table B5

modelsummary(twfe_by_region_stock_cut, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI stock, sample 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table B6

twfe_by_region_simple_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

## Table B6

modelsummary(twfe_by_region_simple_cut, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI additive, sample 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table B7

twfe_by_region_short_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfin_add_short_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

## Table B7

modelsummary(twfe_by_region_short_cut, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI additive -- No duplicates, sample 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table B8

twfe_by_region_recoded_cut <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country + Year, data = pf_data_panel_cut, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel_cut, region_new == "Western Europe and North America")))

## Table B8

modelsummary(twfe_by_region_recoded_cut, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Relationship between PFSI and political corruption for the global sample and regional samples: PFSI additive -- No duplicates & recoded, sample 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Appendix C: Regression summaries for two-way, country and year fixed-effect models on the relationship between PFSI and corruption displayed in Figure 4 in the main text

## Fit models for Table C1

twfe_by_region_country <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C1

modelsummary(twfe_by_region_country, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Country fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI stock, 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country and year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


## Fit models for Table C2

twfe_by_region_year <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C2

modelsummary(twfe_by_region_year, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Year fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI stock, 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


## Fit models for Table C3

twfe_by_region_country_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C3

modelsummary(twfe_by_region_country_sh, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Country fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI stock, 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table C4

twfe_by_region_year_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pubfinindexstock1log_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C4

modelsummary(twfe_by_region_year_sh, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Year fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI stock, 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table C5

recoded_by_region_country <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C5

modelsummary(recoded_by_region_country, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Country fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI Additive: No duplicates & recoded, 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table C6

recoded_by_region_year <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = pf_data_panel, vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C6

modelsummary(recoded_by_region_year, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Year fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI Additive: No duplicates & recoded, 1900-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table C7

recoded_by_region_country_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Country, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C7

modelsummary(recoded_by_region_country_sh, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Country fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI Additive: No duplicates & recoded, 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by country", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Fit models for Table C8

recoded_by_region_year_sh <- list(
  "Global"  = m1 <- feols(v2x_corrV8 ~ pfsi_recoded_l5 + v2x_polyarchyV8_l5 + I(v2x_polyarchyV8_l5^2) + Fariss_Maddison_gdppc_1990_ln_l5 + Fariss_e_miurbani_l5 + Fariss_gdp_growth_l5 + v2x_elecregV8_l5 | Year, data = subset(pf_data_panel, Year > 1960), vcov = "twoway"),
  "South-East Asia & the Pacific" = m2 <- update(m1, .~., data = subset(pf_data_panel, region_new == "South-East Asia & the Pacific")),
  "Eastern Europe & Central Asia" = m3 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Eastern Europe and Central Asia (post-Communist)")),
  "Middle East & North Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "MENA (Middle East & North Africa)")),
  "Sub-Saharan Africa" = m4 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Sub-Saharan Africa")),
  "Latin America & the Caribbean" = m5 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Latin America & the Caribbean")),
  "Western Europe & North America" = m6 <- update(m1, .~., data = subset(pf_data_panel, region_new == "Western Europe and North America")))

## Table C8

modelsummary(recoded_by_region_year_sh, stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Year fixed-effects models on the relationship between PFSI and political corruption for the global and regional samples: PFSI Additive: No duplicates & recoded, 1960-2015") %>%
    kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
    column_spec(column = 1, width = "3.5cm") %>%
    column_spec(column = 2:8, width = "1.5cm") %>%
    footnote(general = "Table entries represent unstandardised OLS estimates with robust standard errors clustered by year", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Appendix D: Regression summaries on the relationship between PFSI and corruption: Tables 2, 4 and 5 in Hummel et al. study

## Fit models for Table D1: Paraguay models

paraguay <- pf_data %>% filter(country == 189)

## Model 1 in Table 2 in Hummel et al. article

paraguay_ols1 <- lm(dplyr::lead(v2x_corrV8, 5) ~ v2x_corrV8 + pubfinindexstock1log, data = paraguay)

### Model 2 in Table 2 in Hummel et al. article

paraguay_ols2 <- lm(dplyr::lead(v2x_corrV8, 5) ~ v2x_corrV8 + pubfinindexstock1log + v2x_polyarchyV8 + I(v2x_polyarchyV8^2) + Fariss_Maddison_gdppc_1990_ln, data = paraguay)

### Model 3 in Table 2 in Hummel et al. article

paraguay_ols3 <- lm(dplyr::lead(v2x_corrV8, 5) ~ v2x_corrV8 + pubfinindexstock1log + v2x_polyarchyV8 + I(v2x_polyarchyV8^2) + Fariss_Maddison_gdppc_1990_ln + Fariss_gdp_growth + Fariss_e_miurbani + factor(v2x_elecregV8), data = paraguay)

### Clean model terms for Paraguay models 
paraguay_model_terms <- 
  c("pubfinindexstock1log" = "PFSI stock (ln)",
    "v2x_corrV8" = "Lagged corruption index",
    "v2x_polyarchyV8" = "Polyarchy", 
    "I(v2x_polyarchyV8^2)" = "Polyarchy²",
    "Fariss_Maddison_gdppc_1990_ln" = "GDP per capita (log)",
    "Fariss_e_miurbani" = "Urbanization",
    "Fariss_gdp_growth" = "GDP growth",
    "factor(v2x_elecregV8)1" = "Regular elections")


## Table D1: Paraguay models

modelsummary(list("Model 1" = paraguay_ols1, 
                  "Model 2" = paraguay_ols2, 
                  "Model 3" = paraguay_ols3),
             stars = TRUE,
             coef_map = paraguay_model_terms,
             gof_omit = "AIC|BIC|Log.Lik",
             output = "kableExtra",
             title = "PFSI and corruption in Paraguay") %>%
  kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
  column_spec(column = 1, width = "5cm") %>%
  column_spec(column = 2:4, width = "2.5cm") %>%
  footnote(general = "Political corruption (V-Dem), forward lagged by five years. Estimator: Ordinary Least Squares with a lagged dependent variable. This table replicates Table 2 in Hummel et al.", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


## Fit models for Table D2: robustness checks

table_4_models <- list(
  "Executive corruption"  = m1 <- feols(v2x_execorrV8_lead ~ pubfinindexstock1log + v2x_polyarchyV8 + I(v2x_polyarchyV8^2) + Fariss_Maddison_gdppc_1990_ln | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "Legislative corruption" = m2 <- update(m1, v2lgcrrptflip_lead ~. ),
  "Public sector corruption"  = m3 <- update(m1, v2x_pubcorrV8_lead ~.),
  "Judicial corruption"  = m4 <- update(m1, v2jucorrdcflip_lead ~.),
  "Media corruption (placebo)"  = m5 <- update(m1, v2mecorrptflip_lead ~.))

### Create clean model labels for Table D2
model_labels_t4_t5 <- 
  c("pubfinindexstock1log" = "PFSI stock ln",
    "v2elpubfinV8" = "Public subsidies (V-Dem)",
    "v2elpubfinV8_lead" = "Public subsidies (V-Dem), at t",
    "v2eldonateV8_stock_1900_1" = "Disclosure regulations (V-Dem)",
    "pubfin_add" = "PFSI additive",
    "pubfin_add_short" = "PFSI additive: No duplicates",
    "pfsi_recoded" = "PFSI additive: No duplicates & recoded",
    "v2x_polyarchyV8" = "Poliarchy",
    "I(v2x_polyarchyV8^2)" = "Polyarchy²",
    "Fariss_Maddison_gdppc_1990_ln" = "GDP per capita",
    "v2x_elecregV8" = "Regular elections"
    )

## Table D2: robustness checks identical to Table 4 in Hummel et al. article

modelsummary(table_4_models, stars = TRUE,
             coef_map = model_labels_t4_t5,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "PFSI and disaggregated corruption measures") %>%
  kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
  column_spec(column = 1, width = "3.5cm") %>%
  column_spec(column = 2:6, width = "2cm") %>%
  footnote(general = "Corruption in different sectors (V-Dem), forward lagged by five years. Estimator: Ordinary Least Squares panel analysis with two-way fixed effects (unit and time) and standard errors clustered by country. This table replicates Table 4 in Hummel et al.", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)


## Fit models for Table D3: robustness checks 2, replicates Table 5 in Hummel et al. article

table_5_models <- list(
  "Model 1"  = m1 <- feols(v2x_corrV8_lead ~ v2elpubfinV8 + v2x_polyarchyV8 + I(v2x_polyarchyV8^2) + Fariss_Maddison_gdppc_1990_ln | Country + Year, data = pf_data_panel, vcov = "twoway"),
  "Model 2" = m2 <- update(m1, .~. + pubfinindexstock1log + v2eldonateV8_stock_1900_1 - v2elpubfinV8),
  "Model 3"  = m3 <- feols(v2x_corrV8_lead ~ v2elpubfinV8_lead + pubfinindexstock1log | Country + Year, data = pf_data_panel, vcov = "twoway")
)

## Table D3: robustness checks 2, replicates Table 5 in Hummel et al. article

modelsummary(table_5_models, stars = TRUE,
             coef_map = model_labels_t4_t5,
             gof_omit = "AIC|BIC",
             output = "kableExtra",
             title = "Political finance (variously measured) and corruption") %>%
  kable_styling(font_size = 11, full_width = F, latex_options = c("hold_position")) %>%
  column_spec(column = 1, width = "5cm") %>%
  column_spec(column = 2:4, width = "2.5cm") %>%
  footnote(general = "Political corruption (V-Dem). DV is forward lagged by five years except public subsidies in Model 3. Estimator: Ordinary Least Squares panel analysis with two-way fixed effects (unit and time) and standard errors clustered by country. This table replicates Table 5 in Hummel et al.", escape = FALSE, fixed_small_size = TRUE, footnote_as_chunk = TRUE, threeparttable = TRUE)

## Appendix E: Marginal effects of PFSI on corruption by alternative operationalisations of PFSI, region and model specification

### Marginal effects estimation: PFSI stock - full data

twfe_by_region_full_slope <- twfe_by_region_full %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         period = "A: Full data")

### Marginal effects estimation: PFSI Additive - full data

twfe_by_region_simple_slope <- twfe_by_region_simple %>%
  map(., ~plot_slopes(.x, variables = "pubfin_add_l5", condition = "pubfin_add_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive",
         period = "A: Full data")

### Marginal effects estimation: PFSI Additive: No duplicates - full data

twfe_by_region_short_slope <- twfe_by_region_short %>%
  map(., ~plot_slopes(.x, variables = "pubfin_add_short_l5", condition = "pubfin_add_short_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates",
         period = "A: Full data")

### Marginal effects estimation: PFSI Additive: No duplicates & recoded - full data

twfe_by_region_recoded_slope <- twfe_by_region_recoded %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         period = "A: Full data")

### Marginal effects estimation: PFSI stock - Post-1960 data

twfe_stock_cut_slope <- twfe_by_region_stock_cut %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         period = "B: Post-1960 data")

### Marginal effects estimation: PFSI additive - Post-1960 data

twfe_simple_cut_slope <- twfe_by_region_simple_cut %>%
  map(., ~plot_slopes(.x, variables = "pubfin_add_l5", condition = "pubfin_add_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive",
         period = "B: Post-1960 data")

## Marginal effects estimation: PFSI Additive: No duplicates - Post-1960 data

twfe_short_cut_slope <- twfe_by_region_short_cut %>%
  map(., ~plot_slopes(.x, variables = "pubfin_add_short_l5", condition = "pubfin_add_short_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates",
         period = "B: Post-1960 data")

## Marginal effects estimation: PFSI Additive: No duplicates & recoded - Post-1960 data

twfe_recoded_cut_slope <- twfe_by_region_recoded_cut %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         period = "B: Post-1960 data")

## Collect all marginal effects into a data frame to use for Figures E1 and E2

all_slopes_df <- 
  bind_rows(twfe_by_region_full_slope,
            twfe_by_region_simple_slope,
            twfe_by_region_short_slope,
            twfe_by_region_recoded_slope,
            twfe_stock_cut_slope,
            twfe_simple_cut_slope,
            twfe_short_cut_slope,
            twfe_recoded_cut_slope) %>%
  mutate(id = factor(id, levels = c("Global", "Western Europe & North America", "Eastern Europe & Central Asia", "Middle East & North Africa", "Sub-Saharan Africa", "South-East Asia & the Pacific", "Latin America & the Caribbean")),
         corruption_score = 
           case_when(term == "pubfinindexstock1log_l5" ~ pubfinindexstock1log_l5,
                     term == "pubfin_add_l5" ~ pubfin_add_l5,
                     term == "pubfin_add_short_l5" ~ pubfin_add_short_l5, 
                     term == "pfsi_recoded_l5" ~ pfsi_recoded_l5),
         measure_type = factor(measure_type, levels = c("PFSI Stock",  "PFSI Additive", "PFSI Additive: No duplicates", "PFSI Additive: No duplicates & recoded")))  
              
## Figure E1: "Marginal effects of PFSI on corruption by alternative operationalisations of PFSI and region: full sample 1900-2015"

all_slopes_df %>%
  filter(period == "A: Full data") %>%
  ggplot(aes(x = corruption_score, y = estimate)) +
  geom_line(aes(colour = measure_type)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = measure_type), alpha = 0.2) +
  guides(colour = guide_legend(title = "", nrow = 2),
         fill = guide_legend(title = "", nrow = 2)) +
  scale_colour_manual(values=natparks.pals("Yellowstone", 4)) + 
  scale_fill_manual(values=natparks.pals("Yellowstone", 4)) + 
  facet_wrap(~id, ncol = 2, scales = "free") +
  geom_hline(yintercept = 0, linetype = 5, colour = "grey60") +
  
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey30", face = "bold", size = 12),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12),
        legend.position = "top",
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.6, l = 0, unit = "lines")
  ) +
  labs(x = "PFSI Stock score", y = "Marginal effects of subsidies on corruption")

## Figure E2: "Marginal effects of PFSI on corruption by alternative operationalisations of PFSI and region: restricted sample 1960-2015"

all_slopes_df %>%
  filter(period == "B: Post-1960 data") %>%
  ggplot(aes(x = corruption_score, y = estimate)) +
  geom_line(aes(colour = measure_type)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = measure_type), alpha = 0.2) +
  guides(colour = guide_legend(title = "", nrow = 2),
         fill = guide_legend(title = "", nrow = 2)) +
  scale_colour_manual(values=natparks.pals("Yellowstone", 4)) + 
  scale_fill_manual(values=natparks.pals("Yellowstone", 4)) + 
  facet_wrap(~id, ncol = 2, scales = "free") +
  geom_hline(yintercept = 0, linetype = 5, colour = "grey60") +
  
  theme(strip.background = element_blank(),
         strip.text = element_text(colour = "grey30", face = "bold", size = 12),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12),
        legend.position = "top",
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.6, l = 0, unit = "lines")
  ) +
  labs(x = "PFSI Stock score", y = "Marginal effects of subsidies on corruption")

### Marginal effects estimations for Figures E3 and E4

## Marginal effects estimation for two-way fixed-effects models: PFSI stock - full data

twfe_by_region_full_slope <- twfe_by_region_full %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         model_type = "Two-Way fixed-effects",
         period = "A: Full data")

## Marginal effects estimation for country fixed-effects models: PFSI stock - full data

cfe_country_slope <- twfe_by_region_country %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         model_type = "Country fixed-effects",
         period = "A: Full data")

## Marginal effects estimation for year fixed-effects models: PFSI stock - full data

yfe_year_slope <- twfe_by_region_year %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         model_type = "Year fixed-effects",
         period = "A: Full data")

## Marginal effects estimation for two-way fixed-effects models: PFSI stock - Post-1960 data

twfe_stock_cut_slope <- twfe_by_region_stock_cut %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         model_type = "Two-Way fixed-effects",
         period = "B: Post-1960 data")

## Marginal effects estimation for country fixed-effects models: PFSI stock - Post-1960 data

cfe_country_slope_short <- twfe_by_region_country_sh %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         model_type = "Country fixed-effects",
         period = "B: Post-1960 data")

## Marginal effects estimation for year fixed-effects models: PFSI stock - Post-1960 data

yfe_year_slope_short <- twfe_by_region_year_sh %>%
  map(., ~plot_slopes(.x, variables = "pubfinindexstock1log_l5", condition = "pubfinindexstock1log_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Stock",
         model_type = "Year fixed-effects",
         period = "B: Post-1960 data")

## Marginal effects estimation for two-way fixed-effects models: PFSI Additive: No duplicates & recoded - Full data

twfe_by_region_recoded_slope <- twfe_by_region_recoded %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Two-Way fixed-effects",
         period = "A: Full data")

## Marginal effects estimation for country fixed-effects models: PFSI Additive: No duplicates & recoded - Full data

cfe_country_slope_recoded <- recoded_by_region_country %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Country fixed-effects",
         period = "A: Full data")

## Marginal effects estimation for year fixed-effects models: PFSI Additive: No duplicates & recoded - Full data

yfe_year_slope_recoded <- recoded_by_region_year %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Year fixed-effects",
         period = "A: Full data")

## Marginal effects estimation for two-way fixed-effects models: PFSI Additive: No duplicates & recoded - Post-1960 data

twfe_recoded_cut_slope <- twfe_by_region_recoded_cut %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Two-Way fixed-effects",
         period = "B: Post-1960 data")

## Marginal effects estimation for country fixed-effects models: PFSI Additive: No duplicates & recoded - Post-1960 data

cfe_country_slope_recoded_short <- recoded_by_region_country_sh %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Country fixed-effects",
         period = "B: Post-1960 data")

## Marginal effects estimation for year fixed-effects models: PFSI Additive: No duplicates & recoded - Post-1960 data

yfe_year_slope_recoded_short <- recoded_by_region_year_sh %>%
  map(., ~plot_slopes(.x, variables = "pfsi_recoded_l5", condition = "pfsi_recoded_l5", slope = "dyex", draw = FALSE)) %>%
  bind_rows(.id = "id") %>%
  mutate(measure_type = "PFSI Additive: No duplicates & recoded",
         model_type = "Year fixed-effects",
         period = "B: Post-1960 data")

## Collect all marginal effects into a data frame to use for Figures E3 and E4
model_type_slopes <- 
  bind_rows(twfe_by_region_full_slope,
            cfe_country_slope,
            yfe_year_slope,
            twfe_stock_cut_slope,
            cfe_country_slope_short,
            yfe_year_slope_short,
            twfe_by_region_recoded_slope,
            cfe_country_slope_recoded,
            yfe_year_slope_recoded,
            twfe_recoded_cut_slope,
            cfe_country_slope_recoded_short,
            yfe_year_slope_recoded_short) %>%
  mutate(id = factor(id, levels = c("Global", "Western Europe & North America", "Eastern Europe & Central Asia", "Middle East & North Africa", "Sub-Saharan Africa", "South-East Asia & the Pacific", "Latin America & the Caribbean")),
         corruption_score = 
           case_when(term == "pubfinindexstock1log_l5" ~ pubfinindexstock1log_l5,
                     term == "pfsi_recoded_l5" ~ pfsi_recoded_l5),
         measure_type = factor(measure_type, levels = c("PFSI Stock", "PFSI Additive: No duplicates & recoded")),
         model_type = factor(model_type, levels = c("Country fixed-effects", "Year fixed-effects", "Two-Way fixed-effects")))  


## Figure E3: "Marginal effects of PFSI on corruption by alternative model specifications, PFSI Stock: 1900-2015"

model_type_slopes %>%
  filter(measure_type == "PFSI Stock", period == "A: Full data") %>%
  ggplot(aes(x = corruption_score, y = estimate)) +
  geom_line(aes(colour = model_type)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = model_type), alpha = 0.2) +
  guides(colour = guide_legend(title = "", nrow = 1),
         fill = guide_legend(title = "", nrow = 1)) +
  scale_colour_manual(values=natparks.pals("Banff", 3)) +
  scale_fill_manual(values=natparks.pals("Banff", 3)) +
  facet_wrap(~id, scales= "free_y", ncol = 2) +
  geom_hline(yintercept = 0, linetype = 5, colour = "grey60") +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey30", face = "bold", size = 12),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12),
        legend.position = "top",
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.6, l = 0, unit = "lines")
  ) +
  labs(x = "PFSI Stock score", y = "Marginal effects of PFSI on corruption with 95% CI")

## Figure E4: "Marginal effects of PFSI on corruption by alternative model specifications: PFSI Stock, 1960-2015"

model_type_slopes %>%
  filter(measure_type == "PFSI Stock", period == "B: Post-1960 data") %>%
  ggplot(aes(x = corruption_score, y = estimate)) +
  geom_line(aes(colour = model_type)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = model_type), alpha = 0.2) +
  guides(colour = guide_legend(title = "", nrow = 1),
         fill = guide_legend(title = "", nrow = 1)) +
  scale_colour_manual(values=natparks.pals("Banff", 3)) +
  scale_fill_manual(values=natparks.pals("Banff", 3)) +
  facet_wrap(~id, scales= "free_y", ncol = 2) +
  geom_hline(yintercept = 0, linetype = 5, colour = "grey60") +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey30", face = "bold", size = 12),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12),
        legend.position = "top",
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.6, l = 0, unit = "lines")
  ) +
  labs(x = "PFSI Stock score", y = "Marginal effects of PFSI on corruption with 95% CI")


## Figure E5: "Marginal effects of PFSI on corruption by alternative model specifications: PFSI Additive No duplicates & recoded, 1900-2015"

model_type_slopes %>%
  filter(measure_type == "PFSI Additive: No duplicates & recoded", period == "A: Full data") %>%
  ggplot(aes(x = corruption_score, y = estimate)) +
  geom_line(aes(colour = model_type)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = model_type), alpha = 0.2) +
  guides(colour = guide_legend(title = "", nrow = 1),
         fill = guide_legend(title = "", nrow = 1)) +
  scale_colour_manual(values=natparks.pals("Banff", 3)) +
  scale_fill_manual(values=natparks.pals("Banff", 3)) +
  facet_wrap(~id, scales= "free_y", ncol = 2) +
  geom_hline(yintercept = 0, linetype = 5, colour = "grey60") +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey30", face = "bold", size = 12),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12),
        legend.position = "top",
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.6, l = 0, unit = "lines")
  ) +
  labs(x = "PFSI Additive: No duplicates & recoded", y = "Marginal effects of PFSI on corruption with 95% CI")

## Figure E6: "Marginal effects of PFSI on corruption by alternative model specifications, PFSI Additive: No duplicates & recoded: 1960-2015"

model_type_slopes %>%
  filter(measure_type == "PFSI Additive: No duplicates & recoded", period == "B: Post-1960 data") %>%
  ggplot(aes(x = corruption_score, y = estimate)) +
  geom_line(aes(colour = model_type)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = model_type), alpha = 0.2) +
  guides(colour = guide_legend(title = "", nrow = 1),
         fill = guide_legend(title = "", nrow = 1)) +
  scale_colour_manual(values=natparks.pals("Banff", 3)) +
  scale_fill_manual(values=natparks.pals("Banff", 3)) +
  facet_wrap(~id, scales= "free_y", ncol = 2) +
  geom_hline(yintercept = 0, linetype = 5, colour = "grey60") +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey30", face = "bold", size = 12),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12),
        legend.position = "top",
        legend.text = element_text(size = 12),
        legend.margin = margin(t = 0, r = 0, b = -0.6, l = 0, unit = "lines")
  ) +
  labs(x = "PFSI Additive: No duplicates & recoded", y = "Marginal effects of PFSI on corruption with 95% CI")

## Appendix F: Relationship between PFSI and political corruption across regions by alternative PFSI operationalisations in raw data

## Figure F1: "Relationship between PFSI and political corruption across regions: PFSI Additive Score, 1900-2015"

pf_data %>%
  mutate(region_new = 
           case_when(region_new == "South-East Asia & the Pacific" ~ "South-East Asia & the Pacific",
                     region_new == "Eastern Europe and Central Asia (post-Communist)" ~ "Eastern Europe & Central Asia",
                     region_new == "Latin America & the Caribbean" ~ "Latin America & the Caribbean",
                     region_new == "MENA (Middle East & North Africa)" ~ "Middle East & North Africa",
                     region_new == "Sub-Saharan Africa" ~ "Sub-Saharan Africa",
                     region_new == "Western Europe and North America" ~ "Western Europe & North America"), 
         region_new = factor(region_new, 
                             levels = c("Eastern Europe & Central Asia", 
                                        "Middle East & North Africa",
                                        "Sub-Saharan Africa",
                                        "South-East Asia & the Pacific",
                                        "Latin America & the Caribbean",
                                        "Western Europe & North America"))) %>%
  drop_na(pubfin_add) %>%
  ggplot(aes(x = pubfin_add, y = v2x_corrV8, fill = factor(pubfin_add))) +
  geom_boxplot(notch = TRUE, notchwidth = 0.75) +
  scale_x_continuous(breaks = seq(0, 5, 1)) +
  scale_fill_manual(values=natparks.pals("Yellowstone", 6)) + 
  facet_wrap(~region_new) +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey20", face = "bold", size = 11),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12)) +
  labs(x = "PFSI Additive Score", y = "Political Corruption (V-DEM)")

## Figure F2: "Relationship between PFSI and political corruption across regions: PFSI Additive Score without duplicates and recoded, 1900-2015"

pf_data %>%
  mutate(region_new = 
           case_when(region_new == "South-East Asia & the Pacific" ~ "South-East Asia & the Pacific",
                     region_new == "Eastern Europe and Central Asia (post-Communist)" ~ "Eastern Europe & Central Asia",
                     region_new == "Latin America & the Caribbean" ~ "Latin America & the Caribbean",
                     region_new == "MENA (Middle East & North Africa)" ~ "Middle East & North Africa",
                     region_new == "Sub-Saharan Africa" ~ "Sub-Saharan Africa",
                     region_new == "Western Europe and North America" ~ "Western Europe & North America"), 
         region_new = factor(region_new, 
                             levels = c("Eastern Europe & Central Asia", 
                                        "Middle East & North Africa",
                                        "Sub-Saharan Africa",
                                        "South-East Asia & the Pacific",
                                        "Latin America & the Caribbean",
                                        "Western Europe & North America"))) %>%
  drop_na(pubfin_add) %>%
  ggplot(aes(x = pfsi_recoded, y = v2x_corrV8, fill = factor(pfsi_recoded))) +
  geom_boxplot(notch = TRUE, notchwidth = 0.75) +
  scale_x_continuous(breaks = seq(0, 5, 1)) +
  scale_fill_manual(values=natparks.pals("Yellowstone", 5)) + 
  facet_wrap(~region_new) +
  theme(strip.background = element_blank(),
        strip.text = element_text(colour = "grey20", face = "bold", size = 11),
        axis.title = element_text(size = 12),
        axis.text = element_text(size = 12)) +
  labs(x = "PFSI Additive: No duplicates & recoded", y = "Political Corruption (V-DEM)")


